Set params

#file <- paste0(ref_path, "/PFIRS/PFIRS 2019-2020 CT pulled 2022.xlsx")
file <- paste0(ref_path, "/PFIRS/PFIRS 2017-2022 pulled 2023.xlsx")
pulled_year <- substr(file, nchar(file)-8, nchar(file)-5)
filter_year <- 2019
min_year <-  2017
max_year <-  2022
include_NF <- "no"
print(paste("data was pulled in", pulled_year))
## [1] "data was pulled in 2023"

Read in data

xy <- read_excel(file)
## Warning: Expecting date in A8155 / R8155C1: got 'NA'
xy %>% head()
## # A tibble: 6 × 9
##   `Burn Date`         `Burn Unit`      Agency  `Acres Burned` Latitude Longitude
##   <dttm>              <chr>            <chr>            <dbl>    <dbl>     <dbl>
## 1 2017-01-02 00:00:00 PP Ridgetop Unit Cal Fi…           10       38.8     -123.
## 2 2017-01-03 00:00:00 Fairway/Bunker   Califo…            2.5     39.2     -120.
## 3 2017-01-03 00:00:00 PP Ridgetop Unit Cal Fi…           10       38.8     -123.
## 4 2017-01-03 00:00:00 FIA Forest hill  Privat…           10       39.0     -120.
## 5 2017-01-03 00:00:00 28A              US For…           51       37.7     -119.
## 6 2017-01-04 00:00:00 Plum 52          US For…            1       39.5     -121.
## # ℹ 3 more variables: `Fuel Type` <chr>, `Total Tons` <chr>, `Burn Type` <chr>

Clean

Fix column names

xy <- xy %>% 
  rename(Burn_Date = `Burn Date`) %>% 
  rename(Burn_Unit = `Burn Unit`) %>% 
  rename(Acres_Burned = `Acres Burned`) %>% 
  rename(Fuel_Type = `Fuel Type`) %>% 
  rename(Total_Tons = `Total Tons`) %>% 
  rename(Burn_Type = `Burn Type`) 

Make lat and long numeric and remove unk rows

xy <- xy %>% 
  filter(!Longitude == "UNK") %>% 
  mutate(Longitude = as.numeric(Longitude)) %>% 
  mutate(Latitude = as.numeric(Latitude))

Fix dates that should be negative

xy <- xy %>% 
  mutate(Longitude = ifelse(Longitude > 0, Longitude*(-1), Longitude))

Add year column

xy <- xy %>% 
  mutate(Year = year(Burn_Date))
summary(as.factor(xy$Year))
## 2017 2018 2019 2020 2021 2022 2023 NA's 
##  855 1276 1324 2195 2504 2492    3    1

Remove rows with NA dates or dates outside the date range

nrow_old <- nrow(xy)
xy <- xy %>% 
  filter(Year <= max_year & Year >= min_year)
deleted_n <- nrow_old - nrow(xy)
print(paste("Deleted", deleted_n, "duplicated rows"))
## [1] "Deleted 4 duplicated rows"

Delete duplicates

From Jason Branz: “Typically if there are multiple records with the same date, burn name, and acres, it’s a duplicate.”

Look at duplicates

xy %>% 
  select(Year, Burn_Date, Burn_Unit, Acres_Burned) %>% 
  filter(duplicated(.))
## # A tibble: 398 × 4
##     Year Burn_Date           Burn_Unit                         Acres_Burned
##    <dbl> <dttm>              <chr>                                    <dbl>
##  1  2017 2017-01-31 00:00:00 Pendola CreepyOregonHillPile Unit        20   
##  2  2017 2017-02-01 00:00:00 Slapjack dozer piles unit 11             14   
##  3  2017 2017-04-24 00:00:00 Delvan Pile                               2   
##  4  2017 2017-11-15 00:00:00 Challenge Landing Pile                    1   
##  5  2017 2017-11-15 00:00:00 2017-Airport East Side                    1.5 
##  6  2017 2017-11-15 00:00:00 Oregon Hill                               2   
##  7  2017 2017-11-27 00:00:00 49 Road Improvement Piles                10   
##  8  2017 2017-11-30 00:00:00 Reserves Piles                            0.25
##  9  2018 2018-01-17 00:00:00 Griff 1002                                6   
## 10  2018 2018-01-17 00:00:00 Twin Peaks 59                            20   
## # ℹ 388 more rows

Remove duplicates

nrow_old <- nrow(xy)
xy <- xy %>% 
  distinct(Burn_Date, Burn_Unit, Acres_Burned, .keep_all = T)
deleted_n <- nrow_old - nrow(xy)
print(paste("Deleted", deleted_n, "duplicated rows"))
## [1] "Deleted 398 duplicated rows"

Make spatial: All years, all places

Make spatial

sf <- st_as_sf(xy, coords = c("Longitude", "Latitude"), crs = 4326, remove = F)
mapview(sf)
## Warning in sf::st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data

Export to shapefile

layer_name = paste0("PFIRS_", min_year, "-", max_year, "_pull", pulled_year)
st_write(sf, "shapefiles", layer_name, driver = "ESRI Shapefile", delete_layer = T)
## Deleting layer `PFIRS_2017-2022_pull2023' using driver `ESRI Shapefile'
## Writing layer `PFIRS_2017-2022_pull2023' to data source 
##   `shapefiles' using driver `ESRI Shapefile'
## Updating existing layer PFIRS_2017-2022_pull2023
## Writing 10248 features with 10 fields and geometry type Point.

Save as Rdata

pfirs <- sf
save(pfirs, file = paste0("Rdata/", layer_name, ".Rdata"))
remove(layer_name)

Mask out federal lands

Crop to state of CA

CA <- st_read(dsn = paste0(ref_path, "/CA boundary/ca-state-boundary/", layer = "CA_State_TIGER2016.shp"))
## Reading layer `CA_State_TIGER2016' from data source 
##   `C:\Users\ctubbesi\OneDrive - California Air Resources Board\Documents\Reference data\CA boundary\ca-state-boundary\CA_State_TIGER2016.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -13857270 ymin: 3832931 xmax: -12705030 ymax: 5162404
## Projected CRS: WGS 84 / Pseudo-Mercator
CA <- st_transform(CA, st_crs(sf))
sf <- st_intersection(sf, CA)
## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data

## Warning in st_is_longlat(x): bounding box has potentially an invalid value
## range for longlat data
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
load(file = "Rdata/NF.Rdata")
NF <- st_transform(NF, st_crs(sf))

This prevents an error in geometry handling:

sf_use_s2(FALSE)
## Spherical geometry (s2) switched off
sf_noNF <- st_difference(sf, NF)
## although coordinates are longitude/latitude, st_difference assumes that they
## are planar
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
mapview::mapview(list(NF, sf_noNF), col.regions=list("red","blue"),col=list("red","blue"))

Save as Rdata

layer = paste0("Rdata/PFIRS_", min_year, "_", max_year, "_pull", pulled_year, "_noNF.Rdata" )
save(sf_noNF, file = "Rdata/PFIRS_2017-2022_pull2023_noNF.Rdata")

Filter to filter_years

xy_year <- xy %>% 
  filter(Year == filter_year)
xy_year %>% head()
## # A tibble: 6 × 10
##   Burn_Date           Burn_Unit Agency Acres_Burned Latitude Longitude Fuel_Type
##   <dttm>              <chr>     <chr>         <dbl>    <dbl>     <dbl> <chr>    
## 1 2019-01-01 00:00:00 Forest M… Leoni…          1       38.6     -121. Slash    
## 2 2019-01-01 00:00:00 Leoni Me… Leoni…          2       38.6     -121. Natural  
## 3 2019-01-02 00:00:00 Foresthi… Calif…          0.1     39.0     -121. Brush    
## 4 2019-01-02 00:00:00 Forest M… Leoni…          1       38.6     -121. Slash    
## 5 2019-01-02 00:00:00 AIRPERSON Sierr…          2       38.9     -121. Slash    
## 6 2019-01-02 00:00:00 Wyntoon … Wynto…          2       41.2     -122. UNK      
## # ℹ 3 more variables: Total_Tons <chr>, Burn_Type <chr>, Year <dbl>

Make spatial

sf_year <- st_as_sf(xy_year, coords = c("Longitude", "Latitude"), crs = 4326)

Export to shapefile

layer_name = paste0("PFIRS_", filter_year, "_pull", pulled_year)
st_write(sf_year, dsn = "shapefiles", layer = layer_name, driver = "ESRI Shapefile", delete_layer = T)
## Deleting layer `PFIRS_2019_pull2023' using driver `ESRI Shapefile'
## Writing layer `PFIRS_2019_pull2023' to data source 
##   `shapefiles' using driver `ESRI Shapefile'
## Updating existing layer PFIRS_2019_pull2023
## Writing 1288 features with 8 fields and geometry type Point.

Write to .Rdata

file = paste0("Rdata/PFIRS_", filter_year, "_pull", pulled_year, ".Rdata" )
save(sf_year, file = layer)